#install.packages("timetk")

Load Libraries

# Load tidyverse and anomalize

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(anomalize)
## == Use anomalize to improve your Forecasts by 50%! =============================
## Business Science offers a 1-hour course - Lab #18: Time Series Anomaly Detection!
## </> Learn more at: https://university.business-science.io/p/learning-labs-pro </>
library(tibbletime)
## 
## Attaching package: 'tibbletime'
## The following object is masked from 'package:stats':
## 
##     filter
library(timetk)

Reading data

#Read the data

df<- read.csv("C:/Users/user/Downloads/Supermarket_Sales_Forecasting - Sales.csv")
head(df, 5)
##        Date    Sales
## 1  1/5/2019 548.9715
## 2  3/8/2019  80.2200
## 3  3/3/2019 340.5255
## 4 1/27/2019 489.0480
## 5  2/8/2019 634.3785
str(df)
## 'data.frame':    1000 obs. of  2 variables:
##  $ Date : chr  "1/5/2019" "3/8/2019" "3/3/2019" "1/27/2019" ...
##  $ Sales: num  549 80.2 340.5 489 634.4 ...
df$Date <- as.Date(Sys.Date() + 1:nrow(df))

df %>% 
  as_tibble() %>%
  time_decompose(Sales) %>%
  anomalize(remainder) %>%
  time_recompose() %>%
  filter(anomaly == 'Yes')
## Converting from tbl_df to tbl_time.
## Auto-index message: index = Date
## frequency = 7 days
## trend = 91 days
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## # A time tibble: 0 x 10
## # Index: Date
## # ... with 10 variables: Date <date>, observed <dbl>, season <dbl>,
## #   trend <dbl>, remainder <dbl>, remainder_l1 <dbl>, remainder_l2 <dbl>,
## #   anomaly <chr>, recomposed_l1 <dbl>, recomposed_l2 <dbl>

Visual of anomalies detected

df %>% 
  as_tibble() %>%
  time_decompose(Sales) %>%
  anomalize(remainder) %>%
  plot_anomaly_decomposition() 
## Converting from tbl_df to tbl_time.
## Auto-index message: index = Date
## frequency = 7 days
## trend = 91 days

Anomaly Detection

df_anomalized <- df %>%
  as.tibble()%>%
    time_decompose(Date, merge = TRUE) %>%
    anomalize(remainder) %>%
    time_recompose()
## Warning: `as.tibble()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Converting from tbl_df to tbl_time.
## Auto-index message: index = Date
## frequency = 7 days
## trend = 91 days
df_anomalized %>% glimpse()
## Rows: 1,000
## Columns: 11
## $ Date          <date> 2022-02-08, 2022-02-09, 2022-02-10, 2022-02-11, 2022-02~
## $ Sales         <dbl> 548.9715, 80.2200, 340.5255, 489.0480, 634.3785, 627.616~
## $ observed      <dbl> 19031, 19032, 19033, 19034, 19035, 19036, 19037, 19038, ~
## $ season        <dbl> -9.527570e-13, -3.130033e-13, 8.005211e-13, 2.848151e-14~
## $ trend         <dbl> 19031, 19032, 19033, 19034, 19035, 19036, 19037, 19038, ~
## $ remainder     <dbl> 7.275958e-12, 7.275958e-12, 7.275958e-12, 7.275958e-12, ~
## $ remainder_l1  <dbl> -8.003553e-11, -8.003553e-11, -8.003553e-11, -8.003553e-~
## $ remainder_l2  <dbl> 7.275958e-11, 7.275958e-11, 7.275958e-11, 7.275958e-11, ~
## $ anomaly       <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "N~
## $ recomposed_l1 <dbl> 19031, 19032, 19033, 19034, 19035, 19036, 19037, 19038, ~
## $ recomposed_l2 <dbl> 19031, 19032, 19033, 19034, 19035, 19036, 19037, 19038, ~

Visualizing anomalies

df_anomalized %>% plot_anomalies(ncol = 5, alpha_dots = 0.2)

Interactive graphical anomaly representation

df %>% timetk::plot_anomaly_diagnostics(Date,Sales, .facet_ncol = 2)
## frequency = 7 observations per 1 week
## trend = 92 observations per 3 months

To find the exact points that are anomalies

df %>% timetk::tk_anomaly_diagnostics(Date, Sales) %>% filter(anomaly=='Yes')
## frequency = 7 observations per 1 week
## trend = 92 observations per 3 months
## # A tibble: 0 x 11
## # ... with 11 variables: Date <date>, observed <dbl>, season <dbl>,
## #   trend <dbl>, remainder <dbl>, seasadj <dbl>, remainder_l1 <dbl>,
## #   remainder_l2 <dbl>, anomaly <chr>, recomposed_l1 <dbl>, recomposed_l2 <dbl>